Note: Some of the figures in this document use grid.arrange or ggarrange. The heights/widths arguments in grid.arrange() and ggarrange() may have to be tweaked to get the proportions of the panels to look less lopsided.

1 Setup

## [[1]]
##  [1] "DESeq2"               "SummarizedExperiment" "DelayedArray"        
##  [4] "BiocParallel"         "matrixStats"          "Biobase"             
##  [7] "GenomicRanges"        "GenomeInfoDb"         "IRanges"             
## [10] "S4Vectors"            "BiocGenerics"         "parallel"            
## [13] "stats4"               "stats"                "graphics"            
## [16] "grDevices"            "utils"                "datasets"            
## [19] "methods"              "base"                
## 
## [[2]]
##  [1] "edgeR"                "limma"                "DESeq2"              
##  [4] "SummarizedExperiment" "DelayedArray"         "BiocParallel"        
##  [7] "matrixStats"          "Biobase"              "GenomicRanges"       
## [10] "GenomeInfoDb"         "IRanges"              "S4Vectors"           
## [13] "BiocGenerics"         "parallel"             "stats4"              
## [16] "stats"                "graphics"             "grDevices"           
## [19] "utils"                "datasets"             "methods"             
## [22] "base"                
## 
## [[3]]
##  [1] "edgeR"                "limma"                "DESeq2"              
##  [4] "SummarizedExperiment" "DelayedArray"         "BiocParallel"        
##  [7] "matrixStats"          "Biobase"              "GenomicRanges"       
## [10] "GenomeInfoDb"         "IRanges"              "S4Vectors"           
## [13] "BiocGenerics"         "parallel"             "stats4"              
## [16] "stats"                "graphics"             "grDevices"           
## [19] "utils"                "datasets"             "methods"             
## [22] "base"                
## 
## [[4]]
##  [1] "forcats"              "stringr"              "dplyr"               
##  [4] "purrr"                "readr"                "tidyr"               
##  [7] "tibble"               "ggplot2"              "tidyverse"           
## [10] "edgeR"                "limma"                "DESeq2"              
## [13] "SummarizedExperiment" "DelayedArray"         "BiocParallel"        
## [16] "matrixStats"          "Biobase"              "GenomicRanges"       
## [19] "GenomeInfoDb"         "IRanges"              "S4Vectors"           
## [22] "BiocGenerics"         "parallel"             "stats4"              
## [25] "stats"                "graphics"             "grDevices"           
## [28] "utils"                "datasets"             "methods"             
## [31] "base"                
## 
## [[5]]
##  [1] "combinat"             "forcats"              "stringr"             
##  [4] "dplyr"                "purrr"                "readr"               
##  [7] "tidyr"                "tibble"               "ggplot2"             
## [10] "tidyverse"            "edgeR"                "limma"               
## [13] "DESeq2"               "SummarizedExperiment" "DelayedArray"        
## [16] "BiocParallel"         "matrixStats"          "Biobase"             
## [19] "GenomicRanges"        "GenomeInfoDb"         "IRanges"             
## [22] "S4Vectors"            "BiocGenerics"         "parallel"            
## [25] "stats4"               "stats"                "graphics"            
## [28] "grDevices"            "utils"                "datasets"            
## [31] "methods"              "base"                
## 
## [[6]]
##  [1] "ggrepel"              "combinat"             "forcats"             
##  [4] "stringr"              "dplyr"                "purrr"               
##  [7] "readr"                "tidyr"                "tibble"              
## [10] "ggplot2"              "tidyverse"            "edgeR"               
## [13] "limma"                "DESeq2"               "SummarizedExperiment"
## [16] "DelayedArray"         "BiocParallel"         "matrixStats"         
## [19] "Biobase"              "GenomicRanges"        "GenomeInfoDb"        
## [22] "IRanges"              "S4Vectors"            "BiocGenerics"        
## [25] "parallel"             "stats4"               "stats"               
## [28] "graphics"             "grDevices"            "utils"               
## [31] "datasets"             "methods"              "base"                
## 
## [[7]]
##  [1] "org.Hs.eg.db"         "AnnotationDbi"        "ggrepel"             
##  [4] "combinat"             "forcats"              "stringr"             
##  [7] "dplyr"                "purrr"                "readr"               
## [10] "tidyr"                "tibble"               "ggplot2"             
## [13] "tidyverse"            "edgeR"                "limma"               
## [16] "DESeq2"               "SummarizedExperiment" "DelayedArray"        
## [19] "BiocParallel"         "matrixStats"          "Biobase"             
## [22] "GenomicRanges"        "GenomeInfoDb"         "IRanges"             
## [25] "S4Vectors"            "BiocGenerics"         "parallel"            
## [28] "stats4"               "stats"                "graphics"            
## [31] "grDevices"            "utils"                "datasets"            
## [34] "methods"              "base"                
## 
## [[8]]
##  [1] "org.Hs.eg.db"         "AnnotationDbi"        "ggrepel"             
##  [4] "combinat"             "forcats"              "stringr"             
##  [7] "dplyr"                "purrr"                "readr"               
## [10] "tidyr"                "tibble"               "ggplot2"             
## [13] "tidyverse"            "edgeR"                "limma"               
## [16] "DESeq2"               "SummarizedExperiment" "DelayedArray"        
## [19] "BiocParallel"         "matrixStats"          "Biobase"             
## [22] "GenomicRanges"        "GenomeInfoDb"         "IRanges"             
## [25] "S4Vectors"            "BiocGenerics"         "parallel"            
## [28] "stats4"               "stats"                "graphics"            
## [31] "grDevices"            "utils"                "datasets"            
## [34] "methods"              "base"                
## 
## [[9]]
##  [1] "ggpubr"               "magrittr"             "org.Hs.eg.db"        
##  [4] "AnnotationDbi"        "ggrepel"              "combinat"            
##  [7] "forcats"              "stringr"              "dplyr"               
## [10] "purrr"                "readr"                "tidyr"               
## [13] "tibble"               "ggplot2"              "tidyverse"           
## [16] "edgeR"                "limma"                "DESeq2"              
## [19] "SummarizedExperiment" "DelayedArray"         "BiocParallel"        
## [22] "matrixStats"          "Biobase"              "GenomicRanges"       
## [25] "GenomeInfoDb"         "IRanges"              "S4Vectors"           
## [28] "BiocGenerics"         "parallel"             "stats4"              
## [31] "stats"                "graphics"             "grDevices"           
## [34] "utils"                "datasets"             "methods"             
## [37] "base"                
## 
## [[10]]
##  [1] "fgsea"                "Rcpp"                 "ggpubr"              
##  [4] "magrittr"             "org.Hs.eg.db"         "AnnotationDbi"       
##  [7] "ggrepel"              "combinat"             "forcats"             
## [10] "stringr"              "dplyr"                "purrr"               
## [13] "readr"                "tidyr"                "tibble"              
## [16] "ggplot2"              "tidyverse"            "edgeR"               
## [19] "limma"                "DESeq2"               "SummarizedExperiment"
## [22] "DelayedArray"         "BiocParallel"         "matrixStats"         
## [25] "Biobase"              "GenomicRanges"        "GenomeInfoDb"        
## [28] "IRanges"              "S4Vectors"            "BiocGenerics"        
## [31] "parallel"             "stats4"               "stats"               
## [34] "graphics"             "grDevices"            "utils"               
## [37] "datasets"             "methods"              "base"                
## 
## [[11]]
##  [1] "ggbeeswarm"           "fgsea"                "Rcpp"                
##  [4] "ggpubr"               "magrittr"             "org.Hs.eg.db"        
##  [7] "AnnotationDbi"        "ggrepel"              "combinat"            
## [10] "forcats"              "stringr"              "dplyr"               
## [13] "purrr"                "readr"                "tidyr"               
## [16] "tibble"               "ggplot2"              "tidyverse"           
## [19] "edgeR"                "limma"                "DESeq2"              
## [22] "SummarizedExperiment" "DelayedArray"         "BiocParallel"        
## [25] "matrixStats"          "Biobase"              "GenomicRanges"       
## [28] "GenomeInfoDb"         "IRanges"              "S4Vectors"           
## [31] "BiocGenerics"         "parallel"             "stats4"              
## [34] "stats"                "graphics"             "grDevices"           
## [37] "utils"                "datasets"             "methods"             
## [40] "base"                
## 
## [[12]]
##  [1] "MESS"                 "geeM"                 "Matrix"              
##  [4] "geepack"              "ggbeeswarm"           "fgsea"               
##  [7] "Rcpp"                 "ggpubr"               "magrittr"            
## [10] "org.Hs.eg.db"         "AnnotationDbi"        "ggrepel"             
## [13] "combinat"             "forcats"              "stringr"             
## [16] "dplyr"                "purrr"                "readr"               
## [19] "tidyr"                "tibble"               "ggplot2"             
## [22] "tidyverse"            "edgeR"                "limma"               
## [25] "DESeq2"               "SummarizedExperiment" "DelayedArray"        
## [28] "BiocParallel"         "matrixStats"          "Biobase"             
## [31] "GenomicRanges"        "GenomeInfoDb"         "IRanges"             
## [34] "S4Vectors"            "BiocGenerics"         "parallel"            
## [37] "stats4"               "stats"                "graphics"            
## [40] "grDevices"            "utils"                "datasets"            
## [43] "methods"              "base"                
## 
## [[13]]
##  [1] "pbmcapply"            "MESS"                 "geeM"                
##  [4] "Matrix"               "geepack"              "ggbeeswarm"          
##  [7] "fgsea"                "Rcpp"                 "ggpubr"              
## [10] "magrittr"             "org.Hs.eg.db"         "AnnotationDbi"       
## [13] "ggrepel"              "combinat"             "forcats"             
## [16] "stringr"              "dplyr"                "purrr"               
## [19] "readr"                "tidyr"                "tibble"              
## [22] "ggplot2"              "tidyverse"            "edgeR"               
## [25] "limma"                "DESeq2"               "SummarizedExperiment"
## [28] "DelayedArray"         "BiocParallel"         "matrixStats"         
## [31] "Biobase"              "GenomicRanges"        "GenomeInfoDb"        
## [34] "IRanges"              "S4Vectors"            "BiocGenerics"        
## [37] "parallel"             "stats4"               "stats"               
## [40] "graphics"             "grDevices"            "utils"               
## [43] "datasets"             "methods"              "base"

2 DEG analysis

2.6 Get GSEA scores for MSigDB oncogenic signatures

First coerce the deg results from DEseq2 into a list of ranks for each comparison using the method described here: https://stephenturner.github.io/deseq-to-fgsea/

Then, we will use fgsea to perform gene set enrichment on all the MSigDB oncogenic signatures.

Generate the enrichment scores & coerce to dataframe

Select the top pathways, filter irrelevant terms, and order the pathways via hierarchical clustering.

# select top pathways & order pathways via hclust
data_list <- lapply(all_fgsea, function(x){
  sig_name <- levels(as.factor(x$signature_name))
  x$pathway <- gsub(paste0(sig_name, '_'), '', x=x$pathway)
  x$sample_comparison <- factor(x$sample_comparison, levels=names(deg_ranks))
  
  df <- x %>%
  filter(padj < 0.05) %>% arrange(padj) %>% group_by(sample_comparison) %>%
  top_n(10, wt=-padj) %>% top_n(10, wt=abs(NES)) %>%
  arrange(abs(NES), -padj) 

  tmp <- df %>% dplyr::select(sample_comparison, pathway, NES) %>%
  group_by(sample_comparison) %>% tidyr::spread(pathway, NES) %>% 
  as.data.frame()
  
  names <- tmp$sample_comparison
  tmp$sample_comparison <- NULL
  tmp <- as.data.frame(lapply(tmp, function(x){
    x[is.na(x)] <- 0
    return(x) }))
  
  rownames(tmp) <- names
  order <- hclust( dist(t(tmp), method = "euclidean"), method = "ward.D" )$order
  df$pathway <- factor(df$pathway,levels=colnames(tmp)[order])


  df <- df[!is.na(df$pathway),]
  df <- df[df$pathway != 'NA',]

  rm(names, tmp)

  return(df)
})

# define terms that are not relevant, and will be hidden in the resulting visualization
terms <- c('INFLUENZA', 'CELL_CYCLE', 'MITOTIC', 'S_PHASE', 'G1_S_TRANSITION', 'ASTHMA', 'AUTOIMMUNE_THYROID_DISEASE',
'INFECTION','DISEASE', 'INTESTINAL_IMMUNE_NETWORK_FOR_IGA_PRODUCTION','METABOLISM', 'APICAL_SURFACE','GENESIS',
 '43S', 'SYNAP','NEUROTRANSMITTER', 'GABA', 'GLUTAMATE', 'POTASSIUM_CHANNELS', 'CHANNEL_TRANSPORT', 'CELL_LINEAGE',
 'LUPUS', 'NEURONAL',  'RP_DEPENDENT_COTRANSLATIONAL_PROTEIN_TARGETING_TO_MEMBRANE', 'TRANSLATION', 'proteasome', 'complement', 'homologous_recombination',
"peptide_chain",'second_messenger','NONSENSE_MEDIATED_DECAY', 'G2M_checkpoint', 'G2_M_CHECKPOINT', 'IMMUNOREGULATORY_INTERACTIONS_BETWEEN',
'RIBOSOME', 'TIGHT_JUNCTION') 

# apply filters
data_filt <- lapply(data_list, function(x){

  y <- x
  for (term in terms){
    y <- y %>% filter(!grepl(term, x=pathway, ignore.case=TRUE))
  }

df <- y

tmp <- df %>% dplyr::select(sample_comparison, pathway, NES) %>%
  group_by(sample_comparison) %>% tidyr::spread(pathway, NES) %>% 
  as.data.frame()
  
  names <- tmp$sample_comparison
  tmp$sample_comparison <- NULL
  tmp <- as.data.frame(lapply(tmp, function(x){
    x[is.na(x)] <- 0
    return(x) }))
  
  rownames(tmp) <- names
  order <- hclust( dist(t(tmp), method = "euclidean"), method = "ward.D" )$order
  df$pathway <- factor(df$pathway,levels=colnames(tmp)[order])


  df <- df[!is.na(df$pathway),]
  df <- df[df$pathway != 'NA',]

  rm(names, tmp)

  return(df)

})

3 Integrate TCGA expression information

3.2 With distributions of specific checkpoint and immune genes

Objective: get a measure (a p-value) of whether the variance between my four samples is greater than if you were to take the variance four random TCGA samples. To do this, do resamplings of 10% of the total possible combinations of 4 tcga-GBM samples from a pool of 155 samples.

## [1] 20811575
## Warning in bind_rows_(x, .id): Unequal factor levels: coercing to character
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector

## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
## # A tibble: 9 x 2
##     pval gene  
##    <dbl> <fct> 
## 1 0.249  CD28  
## 2 0.413  CD3   
## 3 0.313  CD4   
## 4 0.0957 DCLAMP
## 5 0.0118 FOXP3 
## 6 0.211  PD1   
## 7 0.0409 PDL1  
## 8 0.0828 PDL2  
## 9 0.0882 TIM3

4 RSession Info

## R version 3.5.2 (2018-12-20)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS Mojave 10.14.6
## 
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] parallel  stats4    stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] pbmcapply_1.5.0             MESS_0.5.5                 
##  [3] geeM_0.10.1                 Matrix_1.2-17              
##  [5] geepack_1.2-1               ggbeeswarm_0.6.0           
##  [7] fgsea_1.8.0                 Rcpp_1.0.2                 
##  [9] ggpubr_0.2.3                magrittr_1.5               
## [11] org.Hs.eg.db_3.7.0          AnnotationDbi_1.44.0       
## [13] ggrepel_0.8.1               combinat_0.0-8             
## [15] forcats_0.4.0               stringr_1.4.0              
## [17] dplyr_0.8.3                 purrr_0.3.2                
## [19] readr_1.3.1                 tidyr_0.8.3                
## [21] tibble_2.1.3                ggplot2_3.2.1              
## [23] tidyverse_1.2.1             edgeR_3.24.3               
## [25] limma_3.38.3                DESeq2_1.22.2              
## [27] SummarizedExperiment_1.12.0 DelayedArray_0.8.0         
## [29] BiocParallel_1.16.6         matrixStats_0.55.0         
## [31] Biobase_2.42.0              GenomicRanges_1.34.0       
## [33] GenomeInfoDb_1.18.2         IRanges_2.16.0             
## [35] S4Vectors_0.20.1            BiocGenerics_0.28.0        
## 
## loaded via a namespace (and not attached):
##  [1] TH.data_1.0-10         colorspace_1.4-1       ggsignif_0.6.0        
##  [4] estimability_1.3       htmlTable_1.13.1       XVector_0.22.0        
##  [7] base64enc_0.1-3        rstudioapi_0.10        bit64_0.9-7           
## [10] fansi_0.4.0            mvtnorm_1.0-11         lubridate_1.7.4       
## [13] xml2_1.2.2             codetools_0.2-16       splines_3.5.2         
## [16] geneplotter_1.60.0     knitr_1.24             zeallot_0.1.0         
## [19] Formula_1.2-3          jsonlite_1.6           broom_0.5.2           
## [22] annotate_1.60.1        cluster_2.1.0          compiler_3.5.2        
## [25] httr_1.4.1             emmeans_1.4            lsmeans_2.30-0        
## [28] backports_1.1.4        assertthat_0.2.1       lazyeval_0.2.2        
## [31] cli_1.1.0              acepack_1.4.1          htmltools_0.3.6       
## [34] tools_3.5.2            gtable_0.3.0           glue_1.3.1            
## [37] GenomeInfoDbData_1.2.0 fastmatch_1.1-0        cellranger_1.1.0      
## [40] vctrs_0.2.0            nlme_3.1-141           xfun_0.9              
## [43] rvest_0.3.4            qusage_2.16.1          XML_3.98-1.20         
## [46] MASS_7.3-51.4          zoo_1.8-6              zlibbioc_1.28.0       
## [49] scales_1.0.0           hms_0.5.1              sandwich_2.5-1        
## [52] RColorBrewer_1.1-2     yaml_2.2.0             memoise_1.1.0         
## [55] gridExtra_2.3          rpart_4.1-15           latticeExtra_0.6-28   
## [58] stringi_1.4.3          RSQLite_2.1.2          genefilter_1.64.0     
## [61] checkmate_1.9.4        rlang_0.4.0            pkgconfig_2.0.2       
## [64] bitops_1.0-6           evaluate_0.14          lattice_0.20-38       
## [67] labeling_0.3           htmlwidgets_1.3        cowplot_1.0.0         
## [70] bit_1.1-14             tidyselect_0.2.5       R6_2.4.0              
## [73] generics_0.0.2         Hmisc_4.2-0            multcomp_1.4-10       
## [76] DBI_1.0.0              pillar_1.4.2           haven_2.1.1           
## [79] foreign_0.8-72         withr_2.1.2            survival_2.44-1.1     
## [82] RCurl_1.95-4.12        nnet_7.3-12            modelr_0.1.5          
## [85] crayon_1.3.4           utf8_1.1.4             rmarkdown_1.15        
## [88] locfit_1.5-9.1         grid_3.5.2             readxl_1.3.1          
## [91] data.table_1.12.2      blob_1.2.0             digest_0.6.20         
## [94] xtable_1.8-4           munsell_0.5.0          beeswarm_0.2.3        
## [97] vipor_0.4.5